res = GET('https://developer.nps.gov/api/v1/parks?limit=500&api_key=B9nDpbkbrb3kSOjz6kXSxMJ3d6MSpUvt1QqYdeyn')
data = res %>%
content("text") %>%
jsonlite::fromJSON() %>%
as_tibble()
NPS_data = data %>%
unnest(data) %>%
select(fullName,latitude,longitude,topics, activities,states, parkCode) %>% janitor::clean_names() %>%
mutate(
latitude = as.numeric(latitude),
longitude = as.numeric(longitude)
) %>%
unnest(activities, names_sep = "_") %>%
unnest(topics, names_sep = "_")
visitation_data <-
read_csv("data/Query Builder for Public Use Statistics (1979 - Last Calendar Year).csv") %>%
janitor::clean_names() %>%
mutate(unit_code = tolower(unit_code)) %>%
rename(park_code = unit_code,
full_name = park_name) %>%
select(full_name, park_code, park_type, region, state, year, month, recreation_visits, tent_campers, rv_campers, tent_campers, backcountry)
combined_data <- full_join(NPS_data, visitation_data, by = c("park_code"))
region_data_vists <- visitation_data %>%
mutate(region = case_when(
state %in% c("CT", "RI", "NH", "VT", "NJ", "NY", "PA", "MD", "ME", "MA") ~ "northeast",
state %in% c("IL","IN", "MI", "OH", "WI", "IA", "KS", "MN", "MO", "NE", "ND", "SD") ~ "midwest",
state %in% c("FL", "GA", "NC", "SC", "VA", "DE", "WV", "AL", "KY", "MS", "TN", "AR", "LA", "OK", "TX", "DC") ~ "south",
state %in% c("AK", "CA", "HI", "OR", "WA", "AZ", "CO", "ID", "MT", "NV", "NM", "UT", "WY") ~ "west",
state %in% c("VI", "AS", "GU", "PR") ~ "u.s. territory",
TRUE ~ "no state data"
))
region_data <- combined_data %>%
mutate(region = case_when(
state %in%
c("CT", "RI", "NH", "VT", "NJ", "NY", "PA", "MD", "ME", "MA") ~ "northeast",
state %in%
c("IL","IN", "MI", "OH", "WI", "IA", "KS", "MN", "MO",
"NE", "ND", "SD") ~ "midwest",
state %in%
c("FL", "GA", "NC", "SC", "VA", "DE", "WV", "AL", "KY",
"MS", "TN", "AR", "LA", "OK", "TX", "DC") ~ "south",
state %in% c("AK", "CA", "HI", "OR", "WA",
"AZ", "CO", "ID", "MT", "NV", "NM", "UT", "WY") ~ "west",
state %in% c("VI", "AS", "GU", "PR") ~ "u.s. territory",
state == TRUE ~ "no state data"
)) %>% select(c(-full_name.y, -topics_id, -topics_name, -activities_id))
region_data %>%
filter(region == "northeast") %>%
distinct(activities_name) %>%
head() %>% knitr::kable()
| activities_name |
|---|
| Arts and Culture |
| Cultural Demonstrations |
| Astronomy |
| Stargazing |
| Biking |
| Boating |
northeast_plot <- region_data %>%
filter(region == "northeast") %>%
mutate(season = case_when(
month %in% c(12, 1, 2) ~ "Winter",
month %in% c(3, 4, 5) ~ "Spring",
month %in% c(6, 7, 8) ~ "Summer",
TRUE ~ "Fall"
)) %>%
group_by(season) %>%
summarize(
mean_tent = mean(tent_campers, na.rm = TRUE),
mean_backcountry = mean(backcountry, na.rm = TRUE),
mean_rv = mean(rv_campers, na.rm = TRUE)
) %>%
pivot_longer(
cols = starts_with("mean_"),
names_to = "type_visit",
values_to = "mean",
names_prefix = "mean_"
) %>% ggplot(aes(x = season, y = mean)) +
geom_col() +
facet_grid(~type_visit) +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1)) +
labs(title = "Visitation in Northeast by Season",
x = "Season",
y = "Average Visitation")
northeast_plot

region_data %>%
filter(region == "midwest") %>% distinct(activities_name) %>%
head() %>% knitr::kable()
| activities_name |
|---|
| Arts and Culture |
| Cultural Demonstrations |
| Astronomy |
| Stargazing |
| Food |
| Picnicking |
midwest_plot <-
region_data %>%
filter(region == "midwest") %>%
mutate(season = case_when(
month %in% c(12, 1, 2) ~ "Winter",
month %in% c(3, 4, 5) ~ "Spring",
month %in% c(6, 7, 8) ~ "Summer",
TRUE ~ "Fall"
)) %>%
group_by(season) %>%
summarize(
mean_tent = mean(tent_campers, na.rm = TRUE),
mean_backcountry = mean(backcountry, na.rm = TRUE),
mean_rv = mean(rv_campers, na.rm = TRUE)
) %>%
pivot_longer(
cols = starts_with("mean_"),
names_to = "type_visit",
values_to = "mean",
names_prefix = "mean_"
) %>% ggplot(aes(x = season, y = mean)) +
geom_col() +
facet_grid(~type_visit) +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1)) +
labs(title = "Visitation in Midwest by Season",
x = "Season",
y = "Average Visitation")
midwest_plot

region_data %>%
filter(region == "south") %>%
distinct(activities_name) %>%
head() %>%
knitr::kable()
| activities_name |
|---|
| Astronomy |
| Stargazing |
| Food |
| Picnicking |
| Guided Tours |
| Self-Guided Tours - Walking |
south_plot <-
region_data %>%
filter(region == "south") %>%
mutate(season = case_when(
month %in% c(12, 1, 2) ~ "Winter",
month %in% c(3, 4, 5) ~ "Spring",
month %in% c(6, 7, 8) ~ "Summer",
TRUE ~ "Fall"
)) %>%
group_by(season) %>%
summarize(
mean_tent = mean(tent_campers, na.rm = TRUE),
mean_backcountry = mean(backcountry, na.rm = TRUE),
mean_rv = mean(rv_campers, na.rm = TRUE)
) %>%
pivot_longer(
cols = starts_with("mean_"),
names_to = "type_visit",
values_to = "mean",
names_prefix = "mean_"
) %>% ggplot(aes(x = season, y = mean)) +
geom_col() +
facet_grid(~type_visit) +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1)) +
labs(title = "Visitation South by Season",
x = "Season",
y = "Average Visitation")
south_plot

region_data %>% filter(region == "west") %>% distinct(activities_name) %>%
head() %>% knitr::kable()
| activities_name |
|---|
| Arts and Culture |
| Astronomy |
| Stargazing |
| Biking |
| Camping |
| Backcountry Camping |
west_plot <-
region_data %>%
filter(region == "west") %>%
mutate(season = case_when(
month %in% c(12, 1, 2) ~ "Winter",
month %in% c(3, 4, 5) ~ "Spring",
month %in% c(6, 7, 8) ~ "Summer",
TRUE ~ "Fall"
)) %>%
group_by(season) %>%
summarize(
mean_tent = mean(tent_campers, na.rm = TRUE),
mean_backcountry = mean(backcountry, na.rm = TRUE),
mean_rv = mean(rv_campers, na.rm = TRUE)
) %>%
pivot_longer(
cols = starts_with("mean_"),
names_to = "type_visit",
values_to = "mean",
names_prefix = "mean_"
) %>% ggplot(aes(x = season, y = mean)) +
geom_col() +
facet_grid(~type_visit) +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1)) +
labs(title = "Visitation in West by Season",
x = "Season",
y = "Average Visitation")
northeast_plot + midwest_plot + south_plot + west_plot

Across all regions, it seems that summer has the highest visitation across regions, with the exception of backcountry hiking, which has a peak in spring.
For the Northeastern U.S. tent camping is by far the most popular visitation type. Interestingly, in this region of the United States, there was hardly any visitation in winter in all three visitation types. This might be due to the fact that the Northeastern U.S. typically has colder temperatures and possibly snow during this time.
For the Midwestern U.S., tent camping is the most popular visitation type, especially in the summer, but it is closely followed by backcountry hiking in summer. RV camping does seem to be higher in the Midwest than in the northeastern region, especially during the summer. As before, there is hardly any visitation in the winter, and actually none for RV camping. This again might be due to weather conditions not permitting outdoor activities such as these. For the Southern U.S., tent camping has the highest average visitation in the summer time, closely followed by RV camping. In the south, RV camping is much more popular than in other regions around the U.S. Backcountry is lower, but does have an almost even spread among the seasons. This could be due to the more temperate conditions in the winter and other times of the year, allowing individuals to enjoy this activity year round.
For the Western U.S., ten camping once again has the highest average visitation across all seasons. Interestingly, backcountry hiking and RV camping are equal in the summer time in this region. This region has some visitation in the winter, but less than the southern region.
region_data %>% group_by(region) %>%
summarize(avg_visitation = mean(recreation_visits, na.rm = TRUE)) %>%
arrange(desc(avg_visitation)) %>% knitr::kable()
| region | avg_visitation |
|---|---|
| west | 149106.067 |
| south | 95613.721 |
| northeast | 83202.371 |
| midwest | 44547.512 |
| u.s. territory | 31867.809 |
| NA | 4264.793 |
region_long <- region_data %>%
pivot_longer(cols = c(recreation_visits, tent_campers, rv_campers, backcountry),
names_to = "visit_type",
values_to = "count")
The western region of the U.S. seems to have the highest average visitation of all regions, followed by the southern region.
Looking at the amount of parks in each region will be helpful to determine if this is due to the fact that there are more parks in this region, or if there is something else going on.
region_data %>%
group_by(region) %>%
summarize(parks = n_distinct(park_code)) %>% knitr::kable()
| region | parks |
|---|---|
| midwest | 46 |
| northeast | 68 |
| south | 117 |
| u.s. territory | 7 |
| west | 116 |
| NA | 122 |
From this table, we can see that actually the southern region has the most amount of parks in this dataset, followed by the western region. So this trend is visitation is not just due to the amount of parks in a specific region.
region_data %>%
group_by(region) %>%
summarize(
mean_tent = mean(tent_campers, na.rm = TRUE),
mean_backcountry = mean(backcountry, na.rm = TRUE),
mean_rv = mean(rv_campers, na.rm = TRUE)
) %>%
pivot_longer(
cols = starts_with("mean_"),
names_to = "type_visit",
values_to = "mean",
names_prefix = "mean_"
) %>% ggplot(aes(x = region, y = mean)) +
geom_col() + facet_grid(~type_visit) +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1))

Now looking at the specific visits by region, we can see once again that west has the highest mean visitation among all the regions, with its highest being tent camping. Unsurprisingly, we see that the south has the second highest mean visitation. Interestingly, in backcountry hiking, the midwest has the second highest mean visitation, despite the fact that it has one of the lowest amount of parks in the dataset.
regional_data <- NPS_data %>%
mutate(region = case_when(
states %in% c("CT", "RI", "NH", "VT", "NJ", "NY",
"PA", "MD", "ME", "MA") ~ "Northeast",
states %in% c("IL","IN", "MI", "OH", "WI", "IA", "KS",
"MN", "MO", "NE", "ND", "SD") ~ "Midwest",
states %in% c("FL", "GA", "NC", "SC", "VA", "DE", "WV",
"AL", "KY", "MS", "TN", "AR", "LA", "OK", "TX", "DC") ~ "South",
states %in% c("AK", "CA", "HI", "OR", "WA", "AZ",
"CO", "ID", "MT", "NV", "NM", "UT", "WY") ~ "West",
states %in% c("VI", "AS", "GU", "PR") ~ "U.S. Territory",
length(unique(states)) > 1 ~ "Across Multiple States"
))
g <- list(
scope = 'usa',
projection = list(type = 'albers usa'),
showland = TRUE,
landcolor = toRGB("#e5ecf6"),
countrywidth = 0.5,
subunitwidth = 0.5
)
fig <- plot_geo(regional_data, lat = ~latitude, lon = ~longitude)
fig <- fig %>% add_markers(
text = ~paste(full_name, states, sep = "<br />"),
color = ~region, symbol = I("circle"), size = I(8), hoverinfo = "text"
)
fig <- fig %>% layout(
title = 'US National Parks', geo = g
)
fig